{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "!pip install python-dotenv\n",
    "!pip install datasets\n",
    "!pip install langchain\n",
    "!pip install neo4j\n",
    "!pip install llama-index"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import os\n",
    "from dotenv import load_dotenv\n",
    "import logging\n",
    "import sys\n",
    "from llama_index.llms import OpenAI\n",
    "from llama_index import ServiceContext\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "api_key = os.environ['OPENAI_API_KEY']\n",
    "\n",
    "logging.basicConfig(stream=sys.stdout, level=logging.INFO)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "from llama_index import (\n",
    "    KnowledgeGraphIndex,\n",
    "    ServiceContext,\n",
    "    SimpleDirectoryReader,\n",
    "    Document\n",
    ")\n",
    "from llama_index.storage.storage_context import StorageContext\n",
    "from llama_index.graph_stores import Neo4jGraphStore\n",
    "from llama_index.llms import OpenAI\n",
    "from IPython.display import Markdown, display\n",
    "from langchain_community.document_loaders import PyPDFLoader"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = PyPDFLoader('../data/Football_news.pdf')\n",
    "pages = loader.load_and_split()\n",
    "\n",
    "#convert langchain document to llama_index document format\n",
    "documents = [Document(text=content.page_content) for content in pages]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "#creating neo4j graph store\n",
    "\n",
    "url = os.environ['NEO4J_URI']\n",
    "username = os.environ['NEO4J_USERNAME']\n",
    "password = os.environ['NEO4J_PASSWORD']\n",
    "database = \"neo4j\"\n",
    "\n",
    "graph_store = Neo4jGraphStore(\n",
    "    username=username,\n",
    "    password=password,\n",
    "    url=url,\n",
    "    database=database,\n",
    ")\n",
    "\n",
    "storage_context = StorageContext.from_defaults(graph_store=graph_store)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "# define LLM and chunk size\n",
    "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo\")\n",
    "service_context = ServiceContext.from_defaults(llm=llm, chunk_size=200)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# without embeddings\n",
    "\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=3,\n",
    "    service_context=service_context,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "query_engine = index.as_query_engine(\n",
    "    include_text=False, \n",
    "    response_mode=\"tree_summarize\"\n",
    ")\n",
    "response = query_engine.query(\"Between which teams friendlies was played\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<i>Miami and the Hong Kong select team played friendlies.</i>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<i>{response}</i>\"))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "#with embeddings\n",
    "\n",
    "index = KnowledgeGraphIndex.from_documents(\n",
    "    documents,\n",
    "    storage_context=storage_context,\n",
    "    max_triplets_per_chunk=2,\n",
    "    service_context=service_context,\n",
    "    include_embeddings=True,\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [],
   "source": [
    "query_engine = index.as_query_engine(\n",
    "    include_text=True,\n",
    "    response_mode=\"tree_summarize\",\n",
    "    embedding_mode=\"hybrid\",\n",
    "    similarity_top_k=5,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "response = query_engine.query(\"Between which teams friendlies was played\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/markdown": [
       "<i>Miami played friendlies against a Hong Kong select team and Japanese side Vissel Kobe.</i>"
      ],
      "text/plain": [
       "<IPython.core.display.Markdown object>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "display(Markdown(f\"<i>{response}</i>\"))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Without embeddings - \n",
    "\n",
    "Node count -21\n",
    "Relation count - 15\n"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "With embeddings - \n",
    "\n",
    "Node count - 28\n",
    "Relation count - 21"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "base",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
